#import libraries 
library(dplyr)
library(readr)
library (visdat)
library(superml)

#import data file & preview data 
df <- read_csv('/Users/Amanda.Hartzler/Desktop/Data_Analytics_Masters/D206/churn_raw_data.csv')
New names:
• `` -> `...1`
Rows: 10000 Columns: 52
── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (28): Customer_id, Interaction, City, State, County, Area, Timezone, Job,...
dbl (24): ...1, CaseOrder, Zip, Lat, Lng, Population, Children, Age, Income, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(df)
#determine column names, non-null values, & types 
str(df)
spec_tbl_df [10,000 × 52] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ ...1                : num [1:10000] 1 2 3 4 5 6 7 8 9 10 ...
 $ CaseOrder           : num [1:10000] 1 2 3 4 5 6 7 8 9 10 ...
 $ Customer_id         : chr [1:10000] "K409198" "S120509" "K191035" "D90850" ...
 $ Interaction         : chr [1:10000] "aa90260b-4141-4a24-8e36-b04ce1f4f77b" "fb76459f-c047-4a9d-8af9-e0f7d4ac2524" "344d114c-3736-4be5-98f7-c72c281e2d35" "abfa2b40-2d43-4994-b15a-989b8c79e311" ...
 $ City                : chr [1:10000] "Point Baker" "West Branch" "Yamhill" "Del Mar" ...
 $ State               : chr [1:10000] "AK" "MI" "OR" "CA" ...
 $ County              : chr [1:10000] "Prince of Wales-Hyder" "Ogemaw" "Yamhill" "San Diego" ...
 $ Zip                 : num [1:10000] 99927 48661 97148 92014 77461 ...
 $ Lat                 : num [1:10000] 56.3 44.3 45.4 33 29.4 ...
 $ Lng                 : num [1:10000] -133.4 -84.2 -123.2 -117.2 -95.8 ...
 $ Population          : num [1:10000] 38 10446 3735 13863 11352 ...
 $ Area                : chr [1:10000] "Urban" "Urban" "Urban" "Suburban" ...
 $ Timezone            : chr [1:10000] "America/Sitka" "America/Detroit" "America/Los_Angeles" "America/Los_Angeles" ...
 $ Job                 : chr [1:10000] "Environmental health practitioner" "Programmer, multimedia" "Chief Financial Officer" "Solicitor" ...
 $ Children            : num [1:10000] NA 1 4 1 0 3 0 2 2 NA ...
 $ Age                 : num [1:10000] 68 27 50 48 83 83 NA NA 49 86 ...
 $ Education           : chr [1:10000] "Master's Degree" "Regular High School Diploma" "Regular High School Diploma" "Doctorate Degree" ...
 $ Employment          : chr [1:10000] "Part Time" "Retired" "Student" "Retired" ...
 $ Income              : num [1:10000] 28562 21705 NA 18925 40074 ...
 $ Marital             : chr [1:10000] "Widowed" "Married" "Widowed" "Married" ...
 $ Gender              : chr [1:10000] "Male" "Female" "Female" "Male" ...
 $ Churn               : chr [1:10000] "No" "Yes" "No" "No" ...
 $ Outage_sec_perweek  : num [1:10000] 6.97 12.01 10.25 15.21 8.96 ...
 $ Email               : num [1:10000] 10 12 9 15 16 15 10 16 20 18 ...
 $ Contacts            : num [1:10000] 0 0 0 2 2 3 0 0 2 1 ...
 $ Yearly_equip_failure: num [1:10000] 1 1 1 0 1 1 1 0 3 0 ...
 $ Techie              : chr [1:10000] "No" "Yes" "Yes" "Yes" ...
 $ Contract            : chr [1:10000] "One year" "Month-to-month" "Two Year" "Two Year" ...
 $ Port_modem          : chr [1:10000] "Yes" "No" "Yes" "No" ...
 $ Tablet              : chr [1:10000] "Yes" "Yes" "No" "No" ...
 $ InternetService     : chr [1:10000] "Fiber Optic" "Fiber Optic" "DSL" "DSL" ...
 $ Phone               : chr [1:10000] "Yes" "Yes" "Yes" "Yes" ...
 $ Multiple            : chr [1:10000] "No" "Yes" "Yes" "No" ...
 $ OnlineSecurity      : chr [1:10000] "Yes" "Yes" "No" "Yes" ...
 $ OnlineBackup        : chr [1:10000] "Yes" "No" "No" "No" ...
 $ DeviceProtection    : chr [1:10000] "No" "No" "No" "No" ...
 $ TechSupport         : chr [1:10000] "No" "No" "No" "No" ...
 $ StreamingTV         : chr [1:10000] "No" "Yes" "No" "Yes" ...
 $ StreamingMovies     : chr [1:10000] "Yes" "Yes" "Yes" "No" ...
 $ PaperlessBilling    : chr [1:10000] "Yes" "Yes" "Yes" "Yes" ...
 $ PaymentMethod       : chr [1:10000] "Credit Card (automatic)" "Bank Transfer(automatic)" "Credit Card (automatic)" "Mailed Check" ...
 $ Tenure              : num [1:10000] 6.8 1.16 15.75 17.09 1.67 ...
 $ MonthlyCharge       : num [1:10000] 171 243 159 120 151 ...
 $ Bandwidth_GB_Year   : num [1:10000] 905 801 2055 2165 271 ...
 $ item1               : num [1:10000] 5 3 4 4 4 3 6 2 5 2 ...
 $ item2               : num [1:10000] 5 4 4 4 4 3 5 2 4 2 ...
 $ item3               : num [1:10000] 5 3 2 4 4 3 6 2 4 2 ...
 $ item4               : num [1:10000] 3 3 4 2 3 2 4 5 3 2 ...
 $ item5               : num [1:10000] 4 4 4 5 4 4 1 2 4 5 ...
 $ item6               : num [1:10000] 4 3 3 4 4 3 5 3 3 2 ...
 $ item7               : num [1:10000] 3 4 3 3 4 3 5 4 4 3 ...
 $ item8               : num [1:10000] 4 4 3 3 5 3 5 5 4 3 ...
 - attr(*, "spec")=
  .. cols(
  ..   ...1 = col_double(),
  ..   CaseOrder = col_double(),
  ..   Customer_id = col_character(),
  ..   Interaction = col_character(),
  ..   City = col_character(),
  ..   State = col_character(),
  ..   County = col_character(),
  ..   Zip = col_double(),
  ..   Lat = col_double(),
  ..   Lng = col_double(),
  ..   Population = col_double(),
  ..   Area = col_character(),
  ..   Timezone = col_character(),
  ..   Job = col_character(),
  ..   Children = col_double(),
  ..   Age = col_double(),
  ..   Education = col_character(),
  ..   Employment = col_character(),
  ..   Income = col_double(),
  ..   Marital = col_character(),
  ..   Gender = col_character(),
  ..   Churn = col_character(),
  ..   Outage_sec_perweek = col_double(),
  ..   Email = col_double(),
  ..   Contacts = col_double(),
  ..   Yearly_equip_failure = col_double(),
  ..   Techie = col_character(),
  ..   Contract = col_character(),
  ..   Port_modem = col_character(),
  ..   Tablet = col_character(),
  ..   InternetService = col_character(),
  ..   Phone = col_character(),
  ..   Multiple = col_character(),
  ..   OnlineSecurity = col_character(),
  ..   OnlineBackup = col_character(),
  ..   DeviceProtection = col_character(),
  ..   TechSupport = col_character(),
  ..   StreamingTV = col_character(),
  ..   StreamingMovies = col_character(),
  ..   PaperlessBilling = col_character(),
  ..   PaymentMethod = col_character(),
  ..   Tenure = col_double(),
  ..   MonthlyCharge = col_double(),
  ..   Bandwidth_GB_Year = col_double(),
  ..   item1 = col_double(),
  ..   item2 = col_double(),
  ..   item3 = col_double(),
  ..   item4 = col_double(),
  ..   item5 = col_double(),
  ..   item6 = col_double(),
  ..   item7 = col_double(),
  ..   item8 = col_double()
  .. )
 - attr(*, "problems")=<externalptr> 
#determine if any rows are duplicated 
duplicated(df)
   [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
  [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
  [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
  [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
  [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
  [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
  [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
  [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [277] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [289] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [301] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [313] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [325] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [337] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [349] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [361] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [373] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [385] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [397] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [409] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [421] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [433] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [445] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [457] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [469] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [481] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [493] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [505] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [517] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [529] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [541] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [553] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [565] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [577] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [589] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [601] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [613] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [625] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [637] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [649] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [661] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [673] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [685] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [697] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [709] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [721] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [733] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [745] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [757] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [769] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [781] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [793] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [805] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [817] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [829] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [841] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [853] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [865] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [877] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [889] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [901] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [913] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [925] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [937] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [949] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [961] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [973] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [985] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [997] FALSE FALSE FALSE FALSE
 [ reached getOption("max.print") -- omitted 9000 entries ]
#delete any duplicated rows
df <- distinct(df)
print(df)
#no duplicated values in dataset
#determine which variables contain null values & how many null values
 colSums(is.na(df))
                ...1            CaseOrder          Customer_id 
                   0                    0                    0 
         Interaction                 City                State 
                   0                    0                    0 
              County                  Zip                  Lat 
                   0                    0                    0 
                 Lng           Population                 Area 
                   0                    0                    0 
            Timezone                  Job             Children 
                   0                    0                 2495 
                 Age            Education           Employment 
                2475                    0                    0 
              Income              Marital               Gender 
                2490                    0                    0 
               Churn   Outage_sec_perweek                Email 
                   0                    0                    0 
            Contacts Yearly_equip_failure               Techie 
                   0                    0                 2477 
            Contract           Port_modem               Tablet 
                   0                    0                    0 
     InternetService                Phone             Multiple 
                   0                 1026                    0 
      OnlineSecurity         OnlineBackup     DeviceProtection 
                   0                    0                    0 
         TechSupport          StreamingTV      StreamingMovies 
                 991                    0                    0 
    PaperlessBilling        PaymentMethod               Tenure 
                   0                    0                  931 
       MonthlyCharge    Bandwidth_GB_Year                item1 
                   0                 1021                    0 
               item2                item3                item4 
                   0                    0                    0 
               item5                item6                item7 
                   0                    0                    0 
               item8 
                   0 
#visualization of missing data
vis_miss(df)

#Check each column containing null values that is a float or integer value. 
#Determine skew, possible ouliers, and distribution. 
#Children Histogram
hist(df$Children, 
     main = 'Children', 
     xlab = 'Number of Children', 
     border = 'blue', 
     col = 'green', 
     xlim = c(0, 10), 
     breaks = 10)

#Age Histogram
hist(df$Age)

#Income Histogram
hist(df$Income)

#Tenure Histogram 
hist(df$Tenure)

hist(df$Tenure)

#Children column is right skewed, therefore I will use the median to impute the data. 
df$Children[is.na(df$Children)]<-median(df$Children,na.rm=TRUE)

#verify the data was imputed 
colSums(is.na(df))
                ...1            CaseOrder          Customer_id 
                   0                    0                    0 
         Interaction                 City                State 
                   0                    0                    0 
              County                  Zip                  Lat 
                   0                    0                    0 
                 Lng           Population                 Area 
                   0                    0                    0 
            Timezone                  Job             Children 
                   0                    0                    0 
                 Age            Education           Employment 
                2475                    0                    0 
              Income              Marital               Gender 
                2490                    0                    0 
               Churn   Outage_sec_perweek                Email 
                   0                    0                    0 
            Contacts Yearly_equip_failure               Techie 
                   0                    0                 2477 
            Contract           Port_modem               Tablet 
                   0                    0                    0 
     InternetService                Phone             Multiple 
                   0                 1026                    0 
      OnlineSecurity         OnlineBackup     DeviceProtection 
                   0                    0                    0 
         TechSupport          StreamingTV      StreamingMovies 
                 991                    0                    0 
    PaperlessBilling        PaymentMethod               Tenure 
                   0                    0                  931 
       MonthlyCharge    Bandwidth_GB_Year                item1 
                   0                 1021                    0 
               item2                item3                item4 
                   0                    0                    0 
               item5                item6                item7 
                   0                    0                    0 
               item8 
                   0 
#Check if the distribution of data was effected by the imputation of the median. 
hist(df$Children)

#Age column has a uniform distribution, therefore I will use the mean to impute the data.
df$Age[is.na(df$Age)]<-mean(df$Age,na.rm=TRUE)

#verify the data was imputed 
colSums(is.na(df))
                ...1            CaseOrder          Customer_id 
                   0                    0                    0 
         Interaction                 City                State 
                   0                    0                    0 
              County                  Zip                  Lat 
                   0                    0                    0 
                 Lng           Population                 Area 
                   0                    0                    0 
            Timezone                  Job             Children 
                   0                    0                    0 
                 Age            Education           Employment 
                   0                    0                    0 
              Income              Marital               Gender 
                2490                    0                    0 
               Churn   Outage_sec_perweek                Email 
                   0                    0                    0 
            Contacts Yearly_equip_failure               Techie 
                   0                    0                 2477 
            Contract           Port_modem               Tablet 
                   0                    0                    0 
     InternetService                Phone             Multiple 
                   0                 1026                    0 
      OnlineSecurity         OnlineBackup     DeviceProtection 
                   0                    0                    0 
         TechSupport          StreamingTV      StreamingMovies 
                 991                    0                    0 
    PaperlessBilling        PaymentMethod               Tenure 
                   0                    0                  931 
       MonthlyCharge    Bandwidth_GB_Year                item1 
                   0                 1021                    0 
               item2                item3                item4 
                   0                    0                    0 
               item5                item6                item7 
                   0                    0                    0 
               item8 
                   0 
#Check if the distribution of data was effected by the imputation of the mean. 
hist(df$Age)

#Income column has is right skewed, therefore I will use the median to impute the data.
df$Income[is.na(df$Income)]<-median(df$Income,na.rm=TRUE)

#verify the data was imputed 
colSums(is.na(df))
                ...1            CaseOrder          Customer_id 
                   0                    0                    0 
         Interaction                 City                State 
                   0                    0                    0 
              County                  Zip                  Lat 
                   0                    0                    0 
                 Lng           Population                 Area 
                   0                    0                    0 
            Timezone                  Job             Children 
                   0                    0                    0 
                 Age            Education           Employment 
                   0                    0                    0 
              Income              Marital               Gender 
                   0                    0                    0 
               Churn   Outage_sec_perweek                Email 
                   0                    0                    0 
            Contacts Yearly_equip_failure               Techie 
                   0                    0                 2477 
            Contract           Port_modem               Tablet 
                   0                    0                    0 
     InternetService                Phone             Multiple 
                   0                 1026                    0 
      OnlineSecurity         OnlineBackup     DeviceProtection 
                   0                    0                    0 
         TechSupport          StreamingTV      StreamingMovies 
                 991                    0                    0 
    PaperlessBilling        PaymentMethod               Tenure 
                   0                    0                  931 
       MonthlyCharge    Bandwidth_GB_Year                item1 
                   0                 1021                    0 
               item2                item3                item4 
                   0                    0                    0 
               item5                item6                item7 
                   0                    0                    0 
               item8 
                   0 
#Check if the distribution of data was effected by the imputation of the median. 
hist(df$Income)

#Tenure column has a bimodal distribution, therefore I decided to use the median to impute the data. (Middleton, 2022a)  
df$Tenure[is.na(df$Tenure)]<-median(df$Tenure,na.rm=TRUE)

#verify the data was imputed 
colSums(is.na(df))
                ...1            CaseOrder          Customer_id 
                   0                    0                    0 
         Interaction                 City                State 
                   0                    0                    0 
              County                  Zip                  Lat 
                   0                    0                    0 
                 Lng           Population                 Area 
                   0                    0                    0 
            Timezone                  Job             Children 
                   0                    0                    0 
                 Age            Education           Employment 
                   0                    0                    0 
              Income              Marital               Gender 
                   0                    0                    0 
               Churn   Outage_sec_perweek                Email 
                   0                    0                    0 
            Contacts Yearly_equip_failure               Techie 
                   0                    0                 2477 
            Contract           Port_modem               Tablet 
                   0                    0                    0 
     InternetService                Phone             Multiple 
                   0                 1026                    0 
      OnlineSecurity         OnlineBackup     DeviceProtection 
                   0                    0                    0 
         TechSupport          StreamingTV      StreamingMovies 
                 991                    0                    0 
    PaperlessBilling        PaymentMethod               Tenure 
                   0                    0                    0 
       MonthlyCharge    Bandwidth_GB_Year                item1 
                   0                 1021                    0 
               item2                item3                item4 
                   0                    0                    0 
               item5                item6                item7 
                   0                    0                    0 
               item8 
                   0 
#Check if the distribution of data was effected by the imputation of the mean. 
hist(df$Tenure)

#Bandwidth_GB_Year column has a bimodal distribution, therefore I decided to use the median to impute the data.
df$Bandwidth_GB_Year[is.na(df$Bandwidth_GB_Year)]<-median(df$Bandwidth_GB_Year,na.rm=TRUE)

#verify the data was imputed 
colSums(is.na(df))
                ...1            CaseOrder          Customer_id 
                   0                    0                    0 
         Interaction                 City                State 
                   0                    0                    0 
              County                  Zip                  Lat 
                   0                    0                    0 
                 Lng           Population                 Area 
                   0                    0                    0 
            Timezone                  Job             Children 
                   0                    0                    0 
                 Age            Education           Employment 
                   0                    0                    0 
              Income              Marital               Gender 
                   0                    0                    0 
               Churn   Outage_sec_perweek                Email 
                   0                    0                    0 
            Contacts Yearly_equip_failure               Techie 
                   0                    0                 2477 
            Contract           Port_modem               Tablet 
                   0                    0                    0 
     InternetService                Phone             Multiple 
                   0                 1026                    0 
      OnlineSecurity         OnlineBackup     DeviceProtection 
                   0                    0                    0 
         TechSupport          StreamingTV      StreamingMovies 
                 991                    0                    0 
    PaperlessBilling        PaymentMethod               Tenure 
                   0                    0                    0 
       MonthlyCharge    Bandwidth_GB_Year                item1 
                   0                    0                    0 
               item2                item3                item4 
                   0                    0                    0 
               item5                item6                item7 
                   0                    0                    0 
               item8 
                   0 
#Check if the distribution of data was effected by the imputation of the mean. 
hist(df$Bandwidth_GB_Year)

#Clean null values from object or text columns using the mode.
#Techie column is text, therefore I will use the mode to impute the data.
df$Techie[is.na(df$Techie)]<-mode(df$Techie)

#verify the data was imputed 
colSums(is.na(df))
                ...1            CaseOrder          Customer_id 
                   0                    0                    0 
         Interaction                 City                State 
                   0                    0                    0 
              County                  Zip                  Lat 
                   0                    0                    0 
                 Lng           Population                 Area 
                   0                    0                    0 
            Timezone                  Job             Children 
                   0                    0                    0 
                 Age            Education           Employment 
                   0                    0                    0 
              Income              Marital               Gender 
                   0                    0                    0 
               Churn   Outage_sec_perweek                Email 
                   0                    0                    0 
            Contacts Yearly_equip_failure               Techie 
                   0                    0                    0 
            Contract           Port_modem               Tablet 
                   0                    0                    0 
     InternetService                Phone             Multiple 
                   0                 1026                    0 
      OnlineSecurity         OnlineBackup     DeviceProtection 
                   0                    0                    0 
         TechSupport          StreamingTV      StreamingMovies 
                 991                    0                    0 
    PaperlessBilling        PaymentMethod               Tenure 
                   0                    0                    0 
       MonthlyCharge    Bandwidth_GB_Year                item1 
                   0                    0                    0 
               item2                item3                item4 
                   0                    0                    0 
               item5                item6                item7 
                   0                    0                    0 
               item8 
                   0 
#Phone column is text, therefore I will use the mode to impute the data.
df$Phone[is.na(df$Phone)]<-mode(df$Phone)

#verify the data was imputed 
colSums(is.na(df))
                ...1            CaseOrder          Customer_id 
                   0                    0                    0 
         Interaction                 City                State 
                   0                    0                    0 
              County                  Zip                  Lat 
                   0                    0                    0 
                 Lng           Population                 Area 
                   0                    0                    0 
            Timezone                  Job             Children 
                   0                    0                    0 
                 Age            Education           Employment 
                   0                    0                    0 
              Income              Marital               Gender 
                   0                    0                    0 
               Churn   Outage_sec_perweek                Email 
                   0                    0                    0 
            Contacts Yearly_equip_failure               Techie 
                   0                    0                    0 
            Contract           Port_modem               Tablet 
                   0                    0                    0 
     InternetService                Phone             Multiple 
                   0                    0                    0 
      OnlineSecurity         OnlineBackup     DeviceProtection 
                   0                    0                    0 
         TechSupport          StreamingTV      StreamingMovies 
                 991                    0                    0 
    PaperlessBilling        PaymentMethod               Tenure 
                   0                    0                    0 
       MonthlyCharge    Bandwidth_GB_Year                item1 
                   0                    0                    0 
               item2                item3                item4 
                   0                    0                    0 
               item5                item6                item7 
                   0                    0                    0 
               item8 
                   0 
#TechSupport column is text, therefore I will use the mode to impute the data.
df$TechSupport[is.na(df$TechSupport)]<-mode(df$TechSupport)

#verify the data was imputed 
colSums(is.na(df))
                ...1            CaseOrder          Customer_id 
                   0                    0                    0 
         Interaction                 City                State 
                   0                    0                    0 
              County                  Zip                  Lat 
                   0                    0                    0 
                 Lng           Population                 Area 
                   0                    0                    0 
            Timezone                  Job             Children 
                   0                    0                    0 
                 Age            Education           Employment 
                   0                    0                    0 
              Income              Marital               Gender 
                   0                    0                    0 
               Churn   Outage_sec_perweek                Email 
                   0                    0                    0 
            Contacts Yearly_equip_failure               Techie 
                   0                    0                    0 
            Contract           Port_modem               Tablet 
                   0                    0                    0 
     InternetService                Phone             Multiple 
                   0                    0                    0 
      OnlineSecurity         OnlineBackup     DeviceProtection 
                   0                    0                    0 
         TechSupport          StreamingTV      StreamingMovies 
                   0                    0                    0 
    PaperlessBilling        PaymentMethod               Tenure 
                   0                    0                    0 
       MonthlyCharge    Bandwidth_GB_Year                item1 
                   0                    0                    0 
               item2                item3                item4 
                   0                    0                    0 
               item5                item6                item7 
                   0                    0                    0 
               item8 
                   0 
b <-boxplot(df$CaseOrder)

#Using Boxplots, check for outliers in each in each float or integer value column. 
b <-boxplot(df$Zip, main = 'Zip')

b <-boxplot(df$Lat, main = 'Lat')

b <-boxplot(df$Lng, main = 'Lng')

b <-boxplot(df$Population, main = 'Population')

b <-boxplot(df$Children, main = 'Children')

b <-boxplot(df$Age, main = 'Age')

b <-boxplot(df$Income, main = 'Income')

b <-boxplot(df$Outage_sec_perweek, main = 'Outage_sec_perweek')

b <-boxplot(df$Email, main = 'Email')

b <-boxplot(df$Contacts, main = 'Contacts')

b <-boxplot(df$Yearly_equip_failure, main = 'Yearly_equip_failure')

b <-boxplot(df$Tenure, main = 'Tenure')

b <-boxplot(df$MonthlyCharge, main = 'MonthlyCharge')

b <-boxplot(df$Bandwidth_GB_Year, main = 'Bandwidth_GB_Year')

b <-boxplot(df$item1, main = 'item1')

b <-boxplot(df$item2, main = 'item2')

b <-boxplot(df$item3, main = 'item3')

b <-boxplot(df$item4, main = 'item4')

b <-boxplot(df$item5, main = 'item5')

b <-boxplot(df$item6, main = 'item6')

b <-boxplot(df$item7, main = 'item7')

b <-boxplot(df$item8, main = 'item8')

#Outliers found in Lat, Lng, Population, Children, Income, Outage_sec_perweek, Email, Contacts, Yearly_equip_failure, MonthlyCharge, item1, item2, item3, item4, item5, item6, item7, & item8 columns. 
#Treating outliers: 
max(df$Lat)
[1] 70.64066
min(df$Lat)
[1] 17.96612
#Retain outliers in Lat (Incuding US territories, the min and max are within a valid range) (Bathman, 2018)
max(df$Lng)
[1] -65.66785
min(df$Lng)
[1] -171.6882
#Retain outliers in Lng (Incuding US territories, the min and max are within a valid range) (Bathman, 2018)
summary(df$Population)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      0     738    2910    9757   13168  111850 
#Replace outlier population values > 27,000 
#New York City, NY, has the most density population in the US. In New York the max population density is a little over 27,000 per square mile. Therefore the right skewed outliers are likely entry errors. (Planning-Population-NYC Population Facts - DCP, n.d.)
df["Population"][df["Population"] >= 27000] <- 2931
summary(df$Population)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      0     738    2910    4949    5943   26999 
#Check distribution of population data. 
b <-boxplot(df$Population, main = 'Population')

#Check that the max income is within a reasonable range
max(df$Income)
[1] 258900.7
#Check that the max and min outage_sec_perweek is within a reasonable range
max(df$Outage_sec_perweek)
[1] 47.04928
min(df$Outage_sec_perweek)
[1] -1.348571
#Retain outliers in Children (All values are possible children values)
#Retain outliers in Income (All values are possible income values)
summary(df$Outage_sec_perweek)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 -1.349   8.054  10.203  11.453  12.488  47.049 
#Replace negative outliers in Outage_sec_perweek with median because you cannot have less than zero secons of outage
df$Outage_sec_perweek[df$Outage_sec_perweek <0] <- 10.214231
summary(df$Population)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      0     738    2910    4949    5943   26999 
b <-boxplot(df$Outage_sec_perweek, main = 'Outage_sec_perweek')

#Check that the MonthlyCharge income is within a reasonable range
max(df$MonthlyCharge)
[1] 315.8786
#Retain outliers in Email (All values are possible email values)
#Retain outliers in Contacts (All values are possible contact values)
#Retain outliers in Yearly_equip_failure (All values are possible equipment failure values)
#Retain outliers in MonthlyCharge (All values are possible monthly charge values)
#Retain outliers in all item answers (All values are possible values for each item answer)
#Re-expressing Categorical Variables (Middleton, 2022c)
#Practice label encoding yes/no dichotomous binary columns. (By Great Learning Team -, 2022)
lbl = LabelEncoder$new()
df$Churn = lbl$fit_transform(df$Churn)
print(df$Churn)
   [1] 0 1 0 0 1 0 1 1 0 0 0 0 0 0 1 1 1 1 0 1 0 0 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1
  [38] 0 0 0 0 0 1 0 1 1 1 1 0 0 1 1 0 1 1 0 1 0 1 0 0 0 1 0 0 0 1 1 0 1 1 1 0 1
  [75] 1 1 1 0 1 0 1 0 0 0 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 0 1 0 1 0 1 1 1 1 1 1 0
 [112] 1 1 0 1 0 0 0 1 1 0 1 0 0 0 0 1 0 0 1 1 0 1 1 0 0 0 0 1 1 0 1 0 0 1 1 1 0
 [149] 1 1 1 0 0 1 0 0 1 0 1 0 1 1 0 1 1 0 0 1 0 1 1 1 0 1 0 1 0 1 0 0 1 1 0 1 1
 [186] 0 0 1 0 0 0 1 1 0 1 1 0 0 0 0 1 1 1 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 1
 [223] 1 0 1 0 1 0 1 1 1 0 1 0 0 0 1 1 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 1 1 1 0 0 0
 [260] 1 1 1 0 0 1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1 0 1 1 1 0 1 0 1 0 1 0 0 1 1 0
 [297] 0 0 0 1 0 0 1 1 0 1 0 0 0 0 0 1 0 0 0 0 1 1 1 0 0 0 1 0 1 0 1 1 0 1 1 1 1
 [334] 0 0 0 1 1 0 1 1 0 0 1 0 1 0 1 1 1 1 0 0 1 1 0 1 0 1 0 0 1 0 1 0 0 1 0 0 1
 [371] 0 0 0 0 1 1 0 1 0 1 0 1 0 0 1 0 0 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 0 1 0 0 0
 [408] 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 0 1 1 1 0 1 0 0 1 0 1 1
 [445] 0 1 1 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 1 0 0 1 1 0 0 1 1 0 0 1 0 0
 [482] 1 0 0 1 0 1 1 0 1 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 0 1 0 0 0 1 1 1 1 1 0
 [519] 0 1 0 1 0 0 1 1 1 1 0 1 0 0 1 0 0 0 1 0 0 0 0 0 1 1 1 0 1 0 1 1 1 0 1 1 0
 [556] 0 0 1 1 0 1 1 1 0 0 0 0 0 1 1 0 0 1 1 1 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1
 [593] 1 0 1 1 1 0 0 0 0 1 0 0 0 1 0 1 1 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 0 0
 [630] 0 1 1 0 0 1 1 0 1 1 1 1 1 0 0 1 1 0 1 0 0 1 0 1 1 1 0 0 0 1 0 1 0 0 1 1 0
 [667] 0 0 0 0 1 1 0 1 1 1 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 0 1 1 1 1 1 0 0 1
 [704] 1 0 0 0 1 1 1 1 0 0 1 0 0 1 1 1 0 0 1 0 0 1 1 0 0 0 1 0 1 1 0 0 0 1 1 0 1
 [741] 0 0 0 0 0 1 0 0 1 0 0 1 0 0 1 1 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0
 [778] 1 0 1 0 1 1 1 0 0 1 0 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 0 1 0 0 0 1 1 1 0 1 1
 [815] 0 0 1 0 0 1 1 1 1 0 1 1 1 0 0 1 0 1 1 0 1 1 0 0 0 0 1 0 1 1 0 0 0 1 1 0 1
 [852] 1 1 1 0 1 1 0 1 0 0 0 1 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 0 1 0 1 0 0 0 1
 [889] 0 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 0 0 1 0 1 0 1 1 0 0 1 0 1 1 0 0 1 0
 [926] 1 1 1 1 0 0 0 1 0 1 0 0 0 0 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 0 1 0 0 1 0
 [963] 0 1 0 1 1 1 0 0 0 0 1 1 1 0 1 1 0 1 0 1 0 1 0 1 1 1 1 0 1 0 1 0 1 1 1 0 0
[1000] 1
 [ reached getOption("max.print") -- omitted 9000 entries ]
df$Techie = lbl$fit_transform(df$Techie)
df$Port_modem = lbl$fit_transform(df$Port_modem)
df$Phone = lbl$fit_transform(df$Phone)
#Practice Ordinal Encoding (Middleton, 2022c)
#First find unique entiries in the education table. 
print(unique(df$Education))
 [1] "Master's Degree"                         
 [2] "Regular High School Diploma"             
 [3] "Doctorate Degree"                        
 [4] "No Schooling Completed"                  
 [5] "Associate's Degree"                      
 [6] "Bachelor's Degree"                       
 [7] "Some College, Less than 1 Year"          
 [8] "GED or Alternative Credential"           
 [9] "Some College, 1 or More Years, No Degree"
[10] "9th Grade to 12th Grade, No Diploma"     
[11] "Nursery School to 8th Grade"             
[12] "Professional School Degree"              
print(length(unique(df$Education)))
[1] 12
edu.num <- revalue(x = df$Education, replace = c('No Schooling Completed'= 0, 'Nursery School to 8th Grade'= 1, '9th Grade to 12th Grade, No Diploma'= 2, 'GED or Alternative Credential'= 3, 'Regular High School Diploma'= 4, 'Some College, Less than 1 Year'= 5, 'Some College, 1 or More Years, No Degree'= 6, "Associate's Degree"= 7, 'Professional School Degree'= 8, "Bachelor's Degree"= 9, "Master's Degree"= 10, 'Doctorate Degree'= 11))
df$Education_numeric <- as.numeric(edu.num)
#Check that Education_numeric was created correctly.
unique(df$Education_numeric)
 [1] 10  4 11  0  7  9  5  3  6  2  1  8
#Principal Component Analysis
data.frame(colnames(df))
#Principal Component Analysis
features <- df[,c(8, 9, 10, 11, 15, 16, 19, 23, 24, 25, 26, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)]
print(features)
df.pca<- prcomp(df[,c(8, 9, 10, 11, 15, 16, 19, 23, 24, 25, 26, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)], center = TRUE, scale = TRUE)
df.pca$rotation
                               PC1           PC2           PC3          PC4
Zip                   0.0188571537 -0.6401960693  0.2745147405  0.044817773
Lat                   0.0015986087 -0.0630604684  0.0065279851  0.007661462
Lng                  -0.0168198338  0.6456309318 -0.2749570864 -0.043551367
Population           -0.0002513100  0.0965502290 -0.0529230067 -0.014276309
Children             -0.0007364960  0.0265449717 -0.0127067734  0.007702000
Age                  -0.0050703280 -0.0040857303 -0.0126322662 -0.017399437
Income                0.0008734781  0.0022717013  0.0073997800  0.024225479
Outage_sec_perweek    0.0137353655 -0.0001948383  0.0192716284 -0.047172817
Email                -0.0088188377  0.0043371779 -0.0244117269 -0.006065079
Contacts              0.0084514444  0.0103492406 -0.0004317492 -0.011705395
Yearly_equip_failure  0.0078611636 -0.0048470968  0.0197746873  0.008368691
Tenure                0.0105528765  0.2718806419  0.6462878770 -0.069427075
MonthlyCharge         0.0004178732  0.0281393263  0.0348137800 -0.025584901
Bandwidth_GB_Year     0.0124697625  0.2723041111  0.6479502878 -0.071668858
item1                -0.4589214311  0.0283580145  0.0217145869  0.279398233
item2                -0.4339234191  0.0161364593  0.0388539635  0.282013824
item3                -0.4008361003  0.0256498788  0.0259608159  0.280485297
item4                -0.1454553593 -0.0538598694 -0.0300107138 -0.565912750
item5                 0.1753227142  0.0584312697  0.0453129572  0.585224442
item6                -0.4046633995 -0.0362809058  0.0028431418 -0.181950796
item7                -0.3580687534 -0.0214747589  0.0056640371 -0.179712058
item8                -0.3086035826 -0.0214406866 -0.0085830524 -0.131865011
                              PC5          PC6          PC7           PC8
Zip                   0.106838112 -0.071943611 -0.031718948 -0.0004918139
Lat                  -0.604092714  0.349158401  0.176803170 -0.0123467531
Lng                  -0.022622852  0.021301121  0.005119094 -0.0023274569
Population            0.535630824 -0.363897908 -0.109653344 -0.1422587510
Children             -0.165598608  0.099441337 -0.558512211  0.0662076838
Age                   0.072020765  0.004859611  0.490109311 -0.3405078047
Income               -0.051800439  0.034237938 -0.167306173 -0.4106327753
Outage_sec_perweek   -0.404985165 -0.575669020 -0.041002499 -0.0218139633
Email                 0.088798643 -0.126891598  0.108678091  0.6562666245
Contacts              0.045884979 -0.025840089  0.525484143 -0.1569462869
Yearly_equip_failure -0.043414321 -0.034792506 -0.276182910 -0.4799535253
Tenure                0.030263895  0.048458906  0.017542333  0.0114757926
MonthlyCharge        -0.348089364 -0.606043975  0.084388516 -0.0109985176
Bandwidth_GB_Year     0.001008189  0.009490080 -0.005957515  0.0153956264
item1                -0.027406246 -0.020132836 -0.001442801 -0.0063828301
item2                -0.013106446 -0.014528708  0.020744519 -0.0102293745
item3                -0.005955447  0.017962674  0.006202985  0.0371669811
item4                -0.017172468  0.049138274  0.012216257  0.0076772156
item5                -0.005891339 -0.027717791  0.037297888 -0.0021468644
item6                 0.013371009 -0.016879375 -0.008886825 -0.0041392987
item7                -0.001126029  0.039892820 -0.009188857 -0.0050021568
item8                 0.038893665 -0.062066492 -0.002840167 -0.0488327490
                               PC9          PC10         PC11         PC12
Zip                   0.0055109415  0.0221390539  0.009438189  0.033803067
Lat                   0.0581594889 -0.1097768024 -0.031036062 -0.105859420
Lng                  -0.0179312580 -0.0008572831 -0.001855791 -0.015524618
Population            0.1341556756 -0.1122017062  0.011749835  0.049976673
Children             -0.0205626410  0.4926453257  0.110650090  0.606937815
Age                  -0.2540765071  0.3580634821 -0.578105966  0.305043406
Income                0.7017492092  0.3622854173 -0.194644508 -0.344725352
Outage_sec_perweek    0.0137585505  0.0067931275  0.025849450 -0.068991878
Email                -0.0833625663  0.5638131180 -0.116309932 -0.403100609
Contacts              0.1353550498  0.3220837334  0.745542585  0.118774412
Yearly_equip_failure -0.6257293487  0.2089272189  0.189844040 -0.445385778
Tenure               -0.0025325675  0.0070834454 -0.020093179 -0.011219673
MonthlyCharge         0.0220788308 -0.0390286833 -0.062021370  0.149885637
Bandwidth_GB_Year     0.0046265327  0.0038654878  0.001363421  0.008910929
item1                -0.0172336042 -0.0037040449  0.020718797 -0.001922596
item2                 0.0016762503 -0.0003780297  0.005721930  0.012343377
item3                -0.0227616807 -0.0190900159 -0.008295027 -0.015952850
item4                -0.0174267533 -0.0266739621  0.003271266 -0.017267919
item5                -0.0061626124 -0.0097481196 -0.005244191  0.008040304
item6                 0.0019448269  0.0224055595  0.015233095 -0.005862493
item7                 0.0465036490  0.0428836523  0.030152756 -0.012092638
item8                -0.0003514868 -0.0476998738 -0.049361376  0.036070714
                             PC13         PC14         PC15         PC16
Zip                   0.002398198 -0.029410986 -0.012341937 -0.006171460
Lat                  -0.200539009  0.623343169  0.088236550 -0.006956204
Lng                   0.029222817 -0.084953496 -0.002360124  0.006683356
Population           -0.170707780  0.685233124 -0.030816956 -0.028835015
Children             -0.019747722  0.133306417  0.026084326 -0.038932222
Age                   0.092018620  0.069357260 -0.061502206  0.009482201
Income               -0.039235083 -0.114781714  0.010126312 -0.058475688
Outage_sec_perweek    0.671522878  0.164355291 -0.123329097  0.011729552
Email                -0.087028745  0.134842637  0.072635999 -0.017544960
Contacts              0.013622041  0.007555721  0.037937409 -0.036263403
Yearly_equip_failure -0.132347740  0.027762683  0.033070138  0.004632331
Tenure                0.031470595  0.027990666  0.001781114 -0.010329713
MonthlyCharge        -0.647093332 -0.220846643  0.051143892  0.010318362
Bandwidth_GB_Year    -0.015457712  0.012860605  0.013654264  0.001989609
item1                -0.010487739  0.008139223 -0.068894261 -0.118697523
item2                -0.006639538  0.020223318 -0.110396306 -0.171608143
item3                -0.007350790 -0.025935685 -0.174121213 -0.246669351
item4                -0.020931980 -0.024711910 -0.170593451 -0.476036125
item5                 0.045398292 -0.002254260  0.130339171  0.062747564
item6                -0.005335058  0.010159585 -0.066314788  0.058500232
item7                -0.032120564  0.013566201 -0.166240908  0.808303676
item8                 0.139432446 -0.016272987  0.913969888 -0.017445557
                             PC17          PC18         PC19          PC20
Zip                   0.012354529 -0.0028835422 -0.003716220 -0.0106574467
Lat                  -0.017976554 -0.0056537104  0.020434011  0.0048945311
Lng                  -0.009532477  0.0113528349 -0.009383339  0.0093667752
Population            0.035797203  0.0019795300  0.030228291 -0.0036413349
Children              0.019900701  0.0117316494  0.020451832  0.0076524120
Age                   0.001340730 -0.0123285066  0.008819602 -0.0160587167
Income                0.004632592  0.0006697662  0.013246817 -0.0048271121
Outage_sec_perweek    0.014749039 -0.0180871530  0.010967979  0.0041225283
Email                 0.016182672  0.0066287580 -0.015666053 -0.0010241338
Contacts              0.003887364 -0.0262335392  0.020794425  0.0006749441
Yearly_equip_failure  0.014258890 -0.0007018728  0.007604732  0.0212155127
Tenure               -0.007512632 -0.0121864765  0.006811292 -0.0050173963
MonthlyCharge         0.013137318  0.0009303910  0.020554916  0.0128927114
Bandwidth_GB_Year    -0.003354044 -0.0016069488 -0.007559956 -0.0072408162
item1                 0.048264506  0.0242272388 -0.239130884 -0.7930855463
item2                 0.070960834  0.0683243429 -0.590745884  0.5731520773
item3                 0.145692701 -0.3923196033  0.676577418  0.1769152875
item4                 0.446844285  0.4313373281  0.087901118 -0.0181018442
item5                 0.208115585  0.6946548884  0.261128981  0.0427600114
item6                -0.757664109  0.4049907481  0.224135917  0.0649766985
item7                 0.373344791  0.0679184919  0.065585465  0.0410352738
item8                 0.108536044 -0.0439036435  0.046246023  0.0431315864
                              PC21          PC22
Zip                  -0.0387970156  0.7001591779
Lat                  -0.0076029404  0.1122678682
Lng                  -0.0405912949  0.7026032290
Population           -0.0009509239  0.0136809476
Children              0.0183468106 -0.0004750638
Age                  -0.0215670835 -0.0021850116
Income               -0.0012984233 -0.0017340651
Outage_sec_perweek   -0.0007812121  0.0006235514
Email                -0.0056071592  0.0032365772
Contacts              0.0027855459 -0.0021232365
Yearly_equip_failure  0.0025489770 -0.0029759712
Tenure                0.7037863485  0.0411839706
MonthlyCharge         0.0483011784 -0.0002902270
Bandwidth_GB_Year    -0.7056788669 -0.0392040654
item1                 0.0030667994 -0.0018051388
item2                 0.0032203848 -0.0038689675
item3                -0.0148755572  0.0058078082
item4                -0.0016189777 -0.0015166223
item5                 0.0030792648 -0.0027467652
item6                -0.0014184860  0.0003596890
item7                 0.0066637807  0.0015055095
item8                 0.0028672522  0.0022335072
fviz_eig(df.pca, choice = "eigenvalue", addlabels = TRUE)

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQKLS0tCmBgYHtyfQojaW1wb3J0IGxpYnJhcmllcyAKbGlicmFyeShkcGx5cikKbGlicmFyeShyZWFkcikKbGlicmFyeSAodmlzZGF0KQpsaWJyYXJ5KHN1cGVybWwpCmxpYnJhcnkocGx5cikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoZmFjdG9leHRyYSkKCiNpbXBvcnQgZGF0YSBmaWxlICYgcHJldmlldyBkYXRhIApkZiA8LSByZWFkX2NzdignL1VzZXJzL0FtYW5kYS5IYXJ0emxlci9EZXNrdG9wL0RhdGFfQW5hbHl0aWNzX01hc3RlcnMvRDIwNi9jaHVybl9yYXdfZGF0YS5jc3YnKQpoZWFkKGRmKQpgYGAKYGBge3J9CiNkZXRlcm1pbmUgY29sdW1uIG5hbWVzLCBub24tbnVsbCB2YWx1ZXMsICYgdHlwZXMgCnN0cihkZikKYGBgCmBgYHtyfQojZGV0ZXJtaW5lIGlmIGFueSByb3dzIGFyZSBkdXBsaWNhdGVkIApkdXBsaWNhdGVkKGRmKQpgYGAKYGBge3J9CiNkZWxldGUgYW55IGR1cGxpY2F0ZWQgcm93cwpkZiA8LSBkaXN0aW5jdChkZikKcHJpbnQoZGYpCmBgYApgYGB7cn0KI25vIGR1cGxpY2F0ZWQgdmFsdWVzIGluIGRhdGFzZXQKI2RldGVybWluZSB3aGljaCB2YXJpYWJsZXMgY29udGFpbiBudWxsIHZhbHVlcyAmIGhvdyBtYW55IG51bGwgdmFsdWVzCiBjb2xTdW1zKGlzLm5hKGRmKSkKYGBgCmBgYHtyfQojdmlzdWFsaXphdGlvbiBvZiBtaXNzaW5nIGRhdGEKdmlzX21pc3MoZGYpCmBgYApgYGB7cn0KI0NoZWNrIGVhY2ggY29sdW1uIGNvbnRhaW5pbmcgbnVsbCB2YWx1ZXMgdGhhdCBpcyBhIGZsb2F0IG9yIGludGVnZXIgdmFsdWUuIAojRGV0ZXJtaW5lIHNrZXcsIHBvc3NpYmxlIG91bGllcnMsIGFuZCBkaXN0cmlidXRpb24uIAojQ2hpbGRyZW4gSGlzdG9ncmFtCmhpc3QoZGYkQ2hpbGRyZW4sIAogICAgIG1haW4gPSAnQ2hpbGRyZW4nLCAKICAgICB4bGFiID0gJ051bWJlciBvZiBDaGlsZHJlbicsIAogICAgIGJvcmRlciA9ICdibHVlJywgCiAgICAgY29sID0gJ2dyZWVuJywgCiAgICAgeGxpbSA9IGMoMCwgMTApLCAKICAgICBicmVha3MgPSAxMCkKYGBgCmBgYHtyfQojQWdlIEhpc3RvZ3JhbQpoaXN0KGRmJEFnZSkKYGBgCmBgYHtyfQojSW5jb21lIEhpc3RvZ3JhbQpoaXN0KGRmJEluY29tZSkKYGBgCmBgYHtyfQojVGVudXJlIEhpc3RvZ3JhbSAKaGlzdChkZiRUZW51cmUpCmBgYApgYGB7cn0KaGlzdChkZiRUZW51cmUpCmBgYApgYGB7cn0KI0NoaWxkcmVuIGNvbHVtbiBpcyByaWdodCBza2V3ZWQsIHRoZXJlZm9yZSBJIHdpbGwgdXNlIHRoZSBtZWRpYW4gdG8gaW1wdXRlIHRoZSBkYXRhLiAKZGYkQ2hpbGRyZW5baXMubmEoZGYkQ2hpbGRyZW4pXTwtbWVkaWFuKGRmJENoaWxkcmVuLG5hLnJtPVRSVUUpCgojdmVyaWZ5IHRoZSBkYXRhIHdhcyBpbXB1dGVkIApjb2xTdW1zKGlzLm5hKGRmKSkKCmBgYApgYGB7cn0KI0NoZWNrIGlmIHRoZSBkaXN0cmlidXRpb24gb2YgZGF0YSB3YXMgZWZmZWN0ZWQgYnkgdGhlIGltcHV0YXRpb24gb2YgdGhlIG1lZGlhbi4gCmhpc3QoZGYkQ2hpbGRyZW4pCmBgYApgYGB7cn0KI0FnZSBjb2x1bW4gaGFzIGEgdW5pZm9ybSBkaXN0cmlidXRpb24sIHRoZXJlZm9yZSBJIHdpbGwgdXNlIHRoZSBtZWFuIHRvIGltcHV0ZSB0aGUgZGF0YS4KZGYkQWdlW2lzLm5hKGRmJEFnZSldPC1tZWFuKGRmJEFnZSxuYS5ybT1UUlVFKQoKI3ZlcmlmeSB0aGUgZGF0YSB3YXMgaW1wdXRlZCAKY29sU3Vtcyhpcy5uYShkZikpCmBgYApgYGB7cn0KI0NoZWNrIGlmIHRoZSBkaXN0cmlidXRpb24gb2YgZGF0YSB3YXMgZWZmZWN0ZWQgYnkgdGhlIGltcHV0YXRpb24gb2YgdGhlIG1lYW4uIApoaXN0KGRmJEFnZSkKYGBgCmBgYHtyfQojSW5jb21lIGNvbHVtbiBoYXMgaXMgcmlnaHQgc2tld2VkLCB0aGVyZWZvcmUgSSB3aWxsIHVzZSB0aGUgbWVkaWFuIHRvIGltcHV0ZSB0aGUgZGF0YS4KZGYkSW5jb21lW2lzLm5hKGRmJEluY29tZSldPC1tZWRpYW4oZGYkSW5jb21lLG5hLnJtPVRSVUUpCgojdmVyaWZ5IHRoZSBkYXRhIHdhcyBpbXB1dGVkIApjb2xTdW1zKGlzLm5hKGRmKSkKYGBgCmBgYHtyfQojQ2hlY2sgaWYgdGhlIGRpc3RyaWJ1dGlvbiBvZiBkYXRhIHdhcyBlZmZlY3RlZCBieSB0aGUgaW1wdXRhdGlvbiBvZiB0aGUgbWVkaWFuLiAKaGlzdChkZiRJbmNvbWUpCmBgYApgYGB7cn0KI1RlbnVyZSBjb2x1bW4gaGFzIGEgYmltb2RhbCBkaXN0cmlidXRpb24sIHRoZXJlZm9yZSBJIGRlY2lkZWQgdG8gdXNlIHRoZSBtZWRpYW4gdG8gaW1wdXRlIHRoZSBkYXRhLiAoTWlkZGxldG9uLCAyMDIyYSkgIApkZiRUZW51cmVbaXMubmEoZGYkVGVudXJlKV08LW1lZGlhbihkZiRUZW51cmUsbmEucm09VFJVRSkKCiN2ZXJpZnkgdGhlIGRhdGEgd2FzIGltcHV0ZWQgCmNvbFN1bXMoaXMubmEoZGYpKQpgYGAKYGBge3J9CiNDaGVjayBpZiB0aGUgZGlzdHJpYnV0aW9uIG9mIGRhdGEgd2FzIGVmZmVjdGVkIGJ5IHRoZSBpbXB1dGF0aW9uIG9mIHRoZSBtZWFuLiAKaGlzdChkZiRUZW51cmUpCmBgYApgYGB7cn0KI0JhbmR3aWR0aF9HQl9ZZWFyIGNvbHVtbiBoYXMgYSBiaW1vZGFsIGRpc3RyaWJ1dGlvbiwgdGhlcmVmb3JlIEkgZGVjaWRlZCB0byB1c2UgdGhlIG1lZGlhbiB0byBpbXB1dGUgdGhlIGRhdGEuCmRmJEJhbmR3aWR0aF9HQl9ZZWFyW2lzLm5hKGRmJEJhbmR3aWR0aF9HQl9ZZWFyKV08LW1lZGlhbihkZiRCYW5kd2lkdGhfR0JfWWVhcixuYS5ybT1UUlVFKQoKI3ZlcmlmeSB0aGUgZGF0YSB3YXMgaW1wdXRlZCAKY29sU3Vtcyhpcy5uYShkZikpCmBgYApgYGB7cn0KI0NoZWNrIGlmIHRoZSBkaXN0cmlidXRpb24gb2YgZGF0YSB3YXMgZWZmZWN0ZWQgYnkgdGhlIGltcHV0YXRpb24gb2YgdGhlIG1lYW4uIApoaXN0KGRmJEJhbmR3aWR0aF9HQl9ZZWFyKQpgYGAKYGBge3J9CiNDbGVhbiBudWxsIHZhbHVlcyBmcm9tIG9iamVjdCBvciB0ZXh0IGNvbHVtbnMgdXNpbmcgdGhlIG1vZGUuCiNUZWNoaWUgY29sdW1uIGlzIHRleHQsIHRoZXJlZm9yZSBJIHdpbGwgdXNlIHRoZSBtb2RlIHRvIGltcHV0ZSB0aGUgZGF0YS4KZGYkVGVjaGllW2lzLm5hKGRmJFRlY2hpZSldPC1tb2RlKGRmJFRlY2hpZSkKCiN2ZXJpZnkgdGhlIGRhdGEgd2FzIGltcHV0ZWQgCmNvbFN1bXMoaXMubmEoZGYpKQpgYGAKYGBge3J9CiNQaG9uZSBjb2x1bW4gaXMgdGV4dCwgdGhlcmVmb3JlIEkgd2lsbCB1c2UgdGhlIG1vZGUgdG8gaW1wdXRlIHRoZSBkYXRhLgpkZiRQaG9uZVtpcy5uYShkZiRQaG9uZSldPC1tb2RlKGRmJFBob25lKQoKI3ZlcmlmeSB0aGUgZGF0YSB3YXMgaW1wdXRlZCAKY29sU3Vtcyhpcy5uYShkZikpCmBgYApgYGB7cn0KI1RlY2hTdXBwb3J0IGNvbHVtbiBpcyB0ZXh0LCB0aGVyZWZvcmUgSSB3aWxsIHVzZSB0aGUgbW9kZSB0byBpbXB1dGUgdGhlIGRhdGEuCmRmJFRlY2hTdXBwb3J0W2lzLm5hKGRmJFRlY2hTdXBwb3J0KV08LW1vZGUoZGYkVGVjaFN1cHBvcnQpCgojdmVyaWZ5IHRoZSBkYXRhIHdhcyBpbXB1dGVkIApjb2xTdW1zKGlzLm5hKGRmKSkKYGBgCmBgYHtyfQpgYGAKCgpgYGB7cn0KYiA8LWJveHBsb3QoZGYkQ2FzZU9yZGVyKQpgYGAKYGBge3J9CiNVc2luZyBCb3hwbG90cywgY2hlY2sgZm9yIG91dGxpZXJzIGluIGVhY2ggaW4gZWFjaCBmbG9hdCBvciBpbnRlZ2VyIHZhbHVlIGNvbHVtbi4gCmIgPC1ib3hwbG90KGRmJFppcCwgbWFpbiA9ICdaaXAnKQpiIDwtYm94cGxvdChkZiRMYXQsIG1haW4gPSAnTGF0JykKYiA8LWJveHBsb3QoZGYkTG5nLCBtYWluID0gJ0xuZycpCmIgPC1ib3hwbG90KGRmJFBvcHVsYXRpb24sIG1haW4gPSAnUG9wdWxhdGlvbicpCmIgPC1ib3hwbG90KGRmJENoaWxkcmVuLCBtYWluID0gJ0NoaWxkcmVuJykKYiA8LWJveHBsb3QoZGYkQWdlLCBtYWluID0gJ0FnZScpCmIgPC1ib3hwbG90KGRmJEluY29tZSwgbWFpbiA9ICdJbmNvbWUnKQpiIDwtYm94cGxvdChkZiRPdXRhZ2Vfc2VjX3BlcndlZWssIG1haW4gPSAnT3V0YWdlX3NlY19wZXJ3ZWVrJykKYiA8LWJveHBsb3QoZGYkRW1haWwsIG1haW4gPSAnRW1haWwnKQpiIDwtYm94cGxvdChkZiRDb250YWN0cywgbWFpbiA9ICdDb250YWN0cycpCmIgPC1ib3hwbG90KGRmJFllYXJseV9lcXVpcF9mYWlsdXJlLCBtYWluID0gJ1llYXJseV9lcXVpcF9mYWlsdXJlJykKYiA8LWJveHBsb3QoZGYkVGVudXJlLCBtYWluID0gJ1RlbnVyZScpCmIgPC1ib3hwbG90KGRmJE1vbnRobHlDaGFyZ2UsIG1haW4gPSAnTW9udGhseUNoYXJnZScpCmIgPC1ib3hwbG90KGRmJEJhbmR3aWR0aF9HQl9ZZWFyLCBtYWluID0gJ0JhbmR3aWR0aF9HQl9ZZWFyJykKYiA8LWJveHBsb3QoZGYkaXRlbTEsIG1haW4gPSAnaXRlbTEnKQpiIDwtYm94cGxvdChkZiRpdGVtMiwgbWFpbiA9ICdpdGVtMicpCmIgPC1ib3hwbG90KGRmJGl0ZW0zLCBtYWluID0gJ2l0ZW0zJykKYiA8LWJveHBsb3QoZGYkaXRlbTQsIG1haW4gPSAnaXRlbTQnKQpiIDwtYm94cGxvdChkZiRpdGVtNSwgbWFpbiA9ICdpdGVtNScpCmIgPC1ib3hwbG90KGRmJGl0ZW02LCBtYWluID0gJ2l0ZW02JykKYiA8LWJveHBsb3QoZGYkaXRlbTcsIG1haW4gPSAnaXRlbTcnKQpiIDwtYm94cGxvdChkZiRpdGVtOCwgbWFpbiA9ICdpdGVtOCcpCmBgYApgYGB7cn0KI091dGxpZXJzIGZvdW5kIGluIExhdCwgTG5nLCBQb3B1bGF0aW9uLCBDaGlsZHJlbiwgSW5jb21lLCBPdXRhZ2Vfc2VjX3BlcndlZWssIEVtYWlsLCBDb250YWN0cywgWWVhcmx5X2VxdWlwX2ZhaWx1cmUsIE1vbnRobHlDaGFyZ2UsIGl0ZW0xLCBpdGVtMiwgaXRlbTMsIGl0ZW00LCBpdGVtNSwgaXRlbTYsIGl0ZW03LCAmIGl0ZW04IGNvbHVtbnMuIAojVHJlYXRpbmcgb3V0bGllcnM6IAptYXgoZGYkTGF0KQptaW4oZGYkTGF0KQpgYGAKYGBge3J9CiNSZXRhaW4gb3V0bGllcnMgaW4gTGF0IChJbmN1ZGluZyBVUyB0ZXJyaXRvcmllcywgdGhlIG1pbiBhbmQgbWF4IGFyZSB3aXRoaW4gYSB2YWxpZCByYW5nZSkgKEJhdGhtYW4sIDIwMTgpCm1heChkZiRMbmcpCm1pbihkZiRMbmcpCmBgYApgYGB7cn0KI1JldGFpbiBvdXRsaWVycyBpbiBMbmcgKEluY3VkaW5nIFVTIHRlcnJpdG9yaWVzLCB0aGUgbWluIGFuZCBtYXggYXJlIHdpdGhpbiBhIHZhbGlkIHJhbmdlKSAoQmF0aG1hbiwgMjAxOCkKc3VtbWFyeShkZiRQb3B1bGF0aW9uKQpgYGAKYGBge3J9CiNSZXBsYWNlIG91dGxpZXIgcG9wdWxhdGlvbiB2YWx1ZXMgPiAyNywwMDAgCiNOZXcgWW9yayBDaXR5LCBOWSwgaGFzIHRoZSBtb3N0IGRlbnNpdHkgcG9wdWxhdGlvbiBpbiB0aGUgVVMuIEluIE5ldyBZb3JrIHRoZSBtYXggcG9wdWxhdGlvbiBkZW5zaXR5IGlzIGEgbGl0dGxlIG92ZXIgMjcsMDAwIHBlciBzcXVhcmUgbWlsZS4gVGhlcmVmb3JlIHRoZSByaWdodCBza2V3ZWQgb3V0bGllcnMgYXJlIGxpa2VseSBlbnRyeSBlcnJvcnMuIChQbGFubmluZy1Qb3B1bGF0aW9uLU5ZQyBQb3B1bGF0aW9uIEZhY3RzIC0gRENQLCBuLmQuKQpkZlsiUG9wdWxhdGlvbiJdW2RmWyJQb3B1bGF0aW9uIl0gPj0gMjcwMDBdIDwtIDI5MzEKc3VtbWFyeShkZiRQb3B1bGF0aW9uKQpgYGAKYGBge3J9CiNDaGVjayBkaXN0cmlidXRpb24gb2YgcG9wdWxhdGlvbiBkYXRhLiAKYiA8LWJveHBsb3QoZGYkUG9wdWxhdGlvbiwgbWFpbiA9ICdQb3B1bGF0aW9uJykKYGBgCmBgYHtyfQojQ2hlY2sgdGhhdCB0aGUgbWF4IGluY29tZSBpcyB3aXRoaW4gYSByZWFzb25hYmxlIHJhbmdlCm1heChkZiRJbmNvbWUpCgojQ2hlY2sgdGhhdCB0aGUgbWF4IGFuZCBtaW4gb3V0YWdlX3NlY19wZXJ3ZWVrIGlzIHdpdGhpbiBhIHJlYXNvbmFibGUgcmFuZ2UKbWF4KGRmJE91dGFnZV9zZWNfcGVyd2VlaykKbWluKGRmJE91dGFnZV9zZWNfcGVyd2VlaykKYGBgCmBgYHtyfQojUmV0YWluIG91dGxpZXJzIGluIENoaWxkcmVuIChBbGwgdmFsdWVzIGFyZSBwb3NzaWJsZSBjaGlsZHJlbiB2YWx1ZXMpCiNSZXRhaW4gb3V0bGllcnMgaW4gSW5jb21lIChBbGwgdmFsdWVzIGFyZSBwb3NzaWJsZSBpbmNvbWUgdmFsdWVzKQpzdW1tYXJ5KGRmJE91dGFnZV9zZWNfcGVyd2VlaykKYGBgCmBgYHtyfQojUmVwbGFjZSBuZWdhdGl2ZSBvdXRsaWVycyBpbiBPdXRhZ2Vfc2VjX3BlcndlZWsgd2l0aCBtZWRpYW4gYmVjYXVzZSB5b3UgY2Fubm90IGhhdmUgbGVzcyB0aGFuIHplcm8gc2Vjb25zIG9mIG91dGFnZQpkZiRPdXRhZ2Vfc2VjX3BlcndlZWtbZGYkT3V0YWdlX3NlY19wZXJ3ZWVrIDwwXSA8LSAxMC4yMTQyMzEKc3VtbWFyeShkZiRQb3B1bGF0aW9uKQpgYGAKYGBge3J9CmIgPC1ib3hwbG90KGRmJE91dGFnZV9zZWNfcGVyd2VlaywgbWFpbiA9ICdPdXRhZ2Vfc2VjX3BlcndlZWsnKQpgYGAKYGBge3J9CiNDaGVjayB0aGF0IHRoZSBNb250aGx5Q2hhcmdlIGluY29tZSBpcyB3aXRoaW4gYSByZWFzb25hYmxlIHJhbmdlCm1heChkZiRNb250aGx5Q2hhcmdlKQpgYGAKYGBge3J9CiNSZXRhaW4gb3V0bGllcnMgaW4gRW1haWwgKEFsbCB2YWx1ZXMgYXJlIHBvc3NpYmxlIGVtYWlsIHZhbHVlcykKI1JldGFpbiBvdXRsaWVycyBpbiBDb250YWN0cyAoQWxsIHZhbHVlcyBhcmUgcG9zc2libGUgY29udGFjdCB2YWx1ZXMpCiNSZXRhaW4gb3V0bGllcnMgaW4gWWVhcmx5X2VxdWlwX2ZhaWx1cmUgKEFsbCB2YWx1ZXMgYXJlIHBvc3NpYmxlIGVxdWlwbWVudCBmYWlsdXJlIHZhbHVlcykKI1JldGFpbiBvdXRsaWVycyBpbiBNb250aGx5Q2hhcmdlIChBbGwgdmFsdWVzIGFyZSBwb3NzaWJsZSBtb250aGx5IGNoYXJnZSB2YWx1ZXMpCiNSZXRhaW4gb3V0bGllcnMgaW4gYWxsIGl0ZW0gYW5zd2VycyAoQWxsIHZhbHVlcyBhcmUgcG9zc2libGUgdmFsdWVzIGZvciBlYWNoIGl0ZW0gYW5zd2VyKQpgYGAKYGBge3J9CiNSZS1leHByZXNzaW5nIENhdGVnb3JpY2FsIFZhcmlhYmxlcyAoTWlkZGxldG9uLCAyMDIyYykKI1ByYWN0aWNlIGxhYmVsIGVuY29kaW5nIHllcy9ubyBkaWNob3RvbW91cyBiaW5hcnkgY29sdW1ucy4gKEJ5IEdyZWF0IExlYXJuaW5nIFRlYW0gLSwgMjAyMikKbGJsID0gTGFiZWxFbmNvZGVyJG5ldygpCmRmJENodXJuID0gbGJsJGZpdF90cmFuc2Zvcm0oZGYkQ2h1cm4pCnByaW50KGRmJENodXJuKQpgYGAKYGBge3J9CmRmJFRlY2hpZSA9IGxibCRmaXRfdHJhbnNmb3JtKGRmJFRlY2hpZSkKZGYkUG9ydF9tb2RlbSA9IGxibCRmaXRfdHJhbnNmb3JtKGRmJFBvcnRfbW9kZW0pCmRmJFBob25lID0gbGJsJGZpdF90cmFuc2Zvcm0oZGYkUGhvbmUpCmBgYApgYGB7cn0KI1ByYWN0aWNlIE9yZGluYWwgRW5jb2RpbmcgKE1pZGRsZXRvbiwgMjAyMmMpCiNGaXJzdCBmaW5kIHVuaXF1ZSBlbnRpcmllcyBpbiB0aGUgZWR1Y2F0aW9uIHRhYmxlLiAKcHJpbnQodW5pcXVlKGRmJEVkdWNhdGlvbikpCnByaW50KGxlbmd0aCh1bmlxdWUoZGYkRWR1Y2F0aW9uKSkpCmBgYApgYGB7cn0KZWR1Lm51bSA8LSByZXZhbHVlKHggPSBkZiRFZHVjYXRpb24sIHJlcGxhY2UgPSBjKCdObyBTY2hvb2xpbmcgQ29tcGxldGVkJz0gMCwgJ051cnNlcnkgU2Nob29sIHRvIDh0aCBHcmFkZSc9IDEsICc5dGggR3JhZGUgdG8gMTJ0aCBHcmFkZSwgTm8gRGlwbG9tYSc9IDIsICdHRUQgb3IgQWx0ZXJuYXRpdmUgQ3JlZGVudGlhbCc9IDMsICdSZWd1bGFyIEhpZ2ggU2Nob29sIERpcGxvbWEnPSA0LCAnU29tZSBDb2xsZWdlLCBMZXNzIHRoYW4gMSBZZWFyJz0gNSwgJ1NvbWUgQ29sbGVnZSwgMSBvciBNb3JlIFllYXJzLCBObyBEZWdyZWUnPSA2LCAiQXNzb2NpYXRlJ3MgRGVncmVlIj0gNywgJ1Byb2Zlc3Npb25hbCBTY2hvb2wgRGVncmVlJz0gOCwgIkJhY2hlbG9yJ3MgRGVncmVlIj0gOSwgIk1hc3RlcidzIERlZ3JlZSI9IDEwLCAnRG9jdG9yYXRlIERlZ3JlZSc9IDExKSkKZGYkRWR1Y2F0aW9uX251bWVyaWMgPC0gYXMubnVtZXJpYyhlZHUubnVtKQpgYGAKYGBge3J9CiNDaGVjayB0aGF0IEVkdWNhdGlvbl9udW1lcmljIHdhcyBjcmVhdGVkIGNvcnJlY3RseS4KdW5pcXVlKGRmJEVkdWNhdGlvbl9udW1lcmljKQpgYGAKYGBge3J9CiNQcmluY2lwYWwgQ29tcG9uZW50IEFuYWx5c2lzCmRhdGEuZnJhbWUoY29sbmFtZXMoZGYpKQpgYGAKYGBge3J9CiNQcmluY2lwYWwgQ29tcG9uZW50IEFuYWx5c2lzCmZlYXR1cmVzIDwtIGRmWyxjKDgsIDksIDEwLCAxMSwgMTUsIDE2LCAxOSwgMjMsIDI0LCAyNSwgMjYsIDQyLCA0MywgNDQsIDQ1LCA0NiwgNDcsIDQ4LCA0OSwgNTAsIDUxLCA1MildCnByaW50KGZlYXR1cmVzKQpgYGAKYGBge3J9CmRmLnBjYTwtIHByY29tcChkZlssYyg4LCA5LCAxMCwgMTEsIDE1LCAxNiwgMTksIDIzLCAyNCwgMjUsIDI2LCA0MiwgNDMsIDQ0LCA0NSwgNDYsIDQ3LCA0OCwgNDksIDUwLCA1MSwgNTIpXSwgY2VudGVyID0gVFJVRSwgc2NhbGUgPSBUUlVFKQpgYGAKYGBge3J9CmRmLnBjYSRyb3RhdGlvbgpgYGAKYGBge3J9CmZ2aXpfZWlnKGRmLnBjYSwgY2hvaWNlID0gImVpZ2VudmFsdWUiLCBhZGRsYWJlbHMgPSBUUlVFKQpgYGAKCg==